/*
Content: Additional variables + merge events + boil down to final dataset "light"
Paper: "Immigrants Return Intentions and Labor Market Behavior when the Home Country is Unsafe"
Journal: Journal of Labor Economics
Authors: Jacopo Bassetto, Teresa Freitas-Monteiro
*/

********************************************************************************
**# LOAD EMPLOYMENT SPELLS DATASET *********
********************************************************************************

clear 
use "$intermediate/employmentspells.dta", clear



********************************************************************************
**# MERGE AKM CALCULATED BY THE STATISICAL OFFICE FDZ FOR THE ENTIRE IEB DATA
********************************************************************************

merge m:1  betnr using "\\IAB.baintern.de\dfs\017\Ablagen\D01700-Projekte\D01700-Projekte-FDZ\Datensaetze\_Endprodukte\AKM-Effekte\AKM_8521\akm_bet_8521_v1.dta", keep(1 3)

sort persnr begepi 
cap drop _bhp19* _bhp20*
drop _merge
g year=year(endepi)


		
***************************************
**# ADDITIONAL VARIABLES 
***************************************

* Date and match date

g match_date=endepi if start_next_emp - endepi >30   & start_next_emp!=.  & endepi!=.
replace  match_date=endepi if start_next_alo<start_next_emp & start_next_alo!=. & start_next_emp!=.
format match_date %td

replace match_date=. if year(endepi)<1998 & match_date!=.


	g startmy = ym( year(match_date),month(match_date))
	format startmy %tm
	clonevar ieb_nat_id = nat_first
	
    cap drop year
	gen date = dofm(startmy)
	gen year = year(date)
	gen month = month(date)


* Identify individuals registered as unemployed
g register_unemp=1 if start_next_alo<start_next_emp & start_next_alo!=. & start_next_emp!=. 
replace register_unemp=1 if tot_days_jobsearch>0 & tot_days_jobsearch!=.
replace register_unemp = 0 if match_date != . & register_unemp != 1 
cap drop obs_rep
cap drop maxobs
bysort persnr match_date: g obs_rep=_n if match_date!=.
bysort persnr match_date: egen maxobs=max(obs_rep)
tab maxobs
drop obs_rep maxobs

* Single AKM from period AKMs
g AKM=feff_1985_1992 if inrange(year,1985,1992)
replace AKM=feff_1993_1999 if inrange(year,1993,1999)
replace AKM=feff_2000_2006 if inrange(year,2000,2006)
replace AKM=feff_2007_2013 if inrange(year,2007,2013)
replace AKM=feff_2014_2021 if inrange(year,2014,2021)

	
* Proxy for ysm 
	g ysm_atunemp= int((endepi-entry_spell)/365)
label var ysm_atunemp "Years since mig."


* Work Location
		cap drop work_kreis 
		gen work_kreis = ieb_ao_krs_num if  !inlist(ieb_ao_krs_num, -9, -7, -5, .)
			
	{	/*  Second fix codes for Kreise that disappeared */
		replace work_kreis = 3159 if inlist(work_kreis,3152) 
		replace work_kreis = 3241 if inlist(work_kreis,3201,3253) 
		replace work_kreis = 5334 if inlist(work_kreis,5313,5354 ) 
		replace work_kreis = 11000 if inlist(work_kreis,11000 11200 ) 
		replace work_kreis = 13073 if inlist(work_kreis,13001) 
		replace work_kreis = 13071 if inlist(work_kreis,13002) 
		replace work_kreis = 13073 if inlist(work_kreis,13005) 
		replace work_kreis = 13076 if inlist(work_kreis,13006) 
		replace work_kreis = 13072 if inlist(work_kreis,13051) 
		replace work_kreis = 13075 if inlist(work_kreis,13052) 
		replace work_kreis = 14524 if inlist(work_kreis,14066) 
		replace work_kreis = 14014 if inlist(work_kreis,14072) 
		replace work_kreis = 14004 if inlist(work_kreis,14079) 
		replace work_kreis = 14627 if inlist(work_kreis,14080) 
		replace work_kreis = 14062 if inlist(work_kreis,14081,14090,14092) 
		replace work_kreis = 14062 if inlist(work_kreis,14090) 
		replace work_kreis = 14511 if inlist(work_kreis,14161) 
		replace work_kreis = 14524 if inlist(work_kreis,14166) 
		replace work_kreis = 14524 if inlist(work_kreis,14167) 
		replace work_kreis = 14511 if inlist(work_kreis,14171,14173) 
		replace work_kreis = 14523 if inlist(work_kreis,14178) 
		replace work_kreis = 14521 if inlist(work_kreis,14181) 
		replace work_kreis = 14511 if inlist(work_kreis,14182) 
		replace work_kreis = 14521 if inlist(work_kreis,14188) 
		replace work_kreis = 14521 if inlist(work_kreis,14191) 
		replace work_kreis = 14524 if inlist(work_kreis,14193) 
		replace work_kreis = 14713 if inlist(work_kreis,14365) 
		replace work_kreis = 14713 if inlist(work_kreis,14374 ,14375,14379, 14383) 
		replace work_kreis = 13072 if inlist(work_kreis,13053) 
		replace work_kreis = 13076 if inlist(work_kreis,13054) 
		replace work_kreis = 13071 if inlist(work_kreis,13055) 
		replace work_kreis = 13074 if inlist(work_kreis,13056) 
		replace work_kreis = 13073 if inlist(work_kreis,13057) 
		replace work_kreis = 13074 if inlist(work_kreis,13058) 
		replace work_kreis = 13073 if inlist(work_kreis,13059) 
		replace work_kreis = 13004 if inlist(work_kreis,13060) 
		replace work_kreis = 13004 if inlist(work_kreis,13061) 
		replace work_kreis = 13073 if inlist(work_kreis,13061) 
		replace work_kreis = 13075 if inlist(work_kreis,13062)
		replace work_kreis = 14713 if inlist(work_kreis,14389) 		
		replace work_kreis = 15001 if inlist(work_kreis,15101 ,15151) 		
		replace work_kreis = 15002 if inlist(work_kreis,15153 ,15154 ,15159 ) 
		replace work_kreis = 15091 if inlist(work_kreis,15171) 
		replace work_kreis = 15002 if inlist(work_kreis,15202) 
		replace work_kreis = 15084 if inlist(work_kreis,15256) 
		replace work_kreis = 15002 if inlist(work_kreis,15260) 
		replace work_kreis = 15088 if inlist(work_kreis,15265 ,15265 ,15266 ,15268) 
		replace work_kreis = 15083 if inlist(work_kreis,15355, 15357 ,15358 ) 
		replace work_kreis = 15090 if inlist(work_kreis,15362, 15363 ,15364) 
		replace work_kreis = 15081 if inlist(work_kreis,15367, 15369, 15370)  
}

* Home Location		
		gen home_kreis = ieb_wo_krs_num if !inlist(ieb_wo_krs_num, -9, -7, -5, .)
			
		{	/* Second fix all inconsistent codes:*/
			
					replace home_kreis = 3241 if inlist(home_kreis,3201,3253) 
		replace home_kreis = 5334 if inlist(home_kreis,5313,5354 ) 
		replace home_kreis = 11000 if inlist(home_kreis,11000 11200 ) 
		replace home_kreis = 13073 if inlist(home_kreis,13001) 
		replace home_kreis = 13071 if inlist(home_kreis,13002) 
		replace home_kreis = 13073 if inlist(home_kreis,13005) 
		replace home_kreis = 13076 if inlist(home_kreis,13006) 
		replace home_kreis = 13072 if inlist(home_kreis,13051) 
		replace home_kreis = 13075 if inlist(home_kreis,13052) 
		replace home_kreis = 14524 if inlist(home_kreis,14066) 
		replace home_kreis = 14014 if inlist(home_kreis,14072) 
		replace home_kreis = 14004 if inlist(home_kreis,14079) 
		replace home_kreis = 14627 if inlist(home_kreis,14080) 
		replace home_kreis = 14062 if inlist(home_kreis,14081,14090,14092) 
		replace home_kreis = 14062 if inlist(home_kreis,14090) 
		replace home_kreis = 14511 if inlist(home_kreis,14161) 
		replace home_kreis = 14524 if inlist(home_kreis,14166) 
		replace home_kreis = 14524 if inlist(home_kreis,14167) 
		replace home_kreis = 14511 if inlist(home_kreis,14171,14173) 
		replace home_kreis = 14523 if inlist(home_kreis,14178) 
		replace home_kreis = 14521 if inlist(home_kreis,14181) 
		replace home_kreis = 14511 if inlist(home_kreis,14182) 
		replace home_kreis = 14521 if inlist(home_kreis,14188) 
		replace home_kreis = 14521 if inlist(home_kreis,14191) 
		replace home_kreis = 14524 if inlist(home_kreis,14193) 
		replace home_kreis = 14713 if inlist(home_kreis,14365) 
		replace home_kreis = 14713 if inlist(home_kreis,14374 ,14375,14379, 14383) 
		replace home_kreis = 13072 if inlist(home_kreis,13053) 
		replace home_kreis = 13076 if inlist(home_kreis,13054) 
		replace home_kreis = 13071 if inlist(home_kreis,13055) 
		replace home_kreis = 13074 if inlist(home_kreis,13056) 
		replace home_kreis = 13073 if inlist(home_kreis,13057) 
		replace home_kreis = 13074 if inlist(home_kreis,13058) 
		replace home_kreis = 13073 if inlist(home_kreis,13059) 
		replace home_kreis = 13004 if inlist(home_kreis,13060) 
		replace home_kreis = 13004 if inlist(home_kreis,13061) 
		replace home_kreis = 13073 if inlist(home_kreis,13061) 
		replace home_kreis = 13075 if inlist(home_kreis,13062)
		replace home_kreis = 14713 if inlist(home_kreis,14389) 		
		replace home_kreis = 15001 if inlist(home_kreis,15101 ,15151) 		
		replace home_kreis = 15002 if inlist(home_kreis,15153 ,15154 ,15159 ) 
		replace home_kreis = 15091 if inlist(home_kreis,15171) 
		replace home_kreis = 15002 if inlist(home_kreis,15202) 
		replace home_kreis = 15084 if inlist(home_kreis,15256) 
		replace home_kreis = 15002 if inlist(home_kreis,15260) 
		replace home_kreis = 15088 if inlist(home_kreis,15265 ,15265 ,15266 ,15268) 
		replace home_kreis = 15083 if inlist(home_kreis,15355, 15357 ,15358 ) 
		replace home_kreis = 15090 if inlist(home_kreis,15362, 15363 ,15364) 
		replace home_kreis = 15081 if inlist(home_kreis,15367, 15369, 15370)  
}
		
* German state where the job is located - home if work missing	
	cap drop woao_kreis
	gen woao_kreis = work_kreis
	replace woao_kreis = home_kreis if work_kreis == . 
		
	g state = int(woao_kreis/1000)
	replace state = . if state == 0

* Adjust education so that missing and no education are together
replace edu_max = 1 if inlist(edu_max,0,.)	
	
* Labels
label var edu_max "Education"
label var age "Age"
label var frau "Female"
			
***************************************
*** FIRM AND JOB RELATED VARIABLES *** 
***************************************
		
* Unemployment duration
gen dist_jobfa = start_next_emp-endepi if match_date!=. & start_next_emp!=. &  endepi!=.
replace dist_jobfa=0 if start_next_alo > start_next_emp & tot_days_jobsearch>0 & tot_days_jobsearch!=. & start_next_emp!=. & start_next_alo!=.
label var dist_jobfa "Unemp. dur."

* Unemployment spell: 1 month and 3 months
foreach x in 30 92  {
	cap drop unemp_`x'm
	gen unemp_`x'm = 1 if dist_jobfa <= `x' 
	replace unemp_`x'm = 0 if dist_jobfa > `x' & dist_jobfa!=.
	}

rename unemp_30* unemp_1*
rename unemp_92* unemp_3*
label var unemp_1m "Emp. within 1m"
label var unemp_3m "Emp. within 3m"


* Wage last job before unemployment (bfu)
clonevar wage_any_lb=tentgelt
gen ln_wage_any_lb=ln(wage_any_lb)
label var ln_wage_any_lb "Ln hourly wage"

* Wage first job after unemployment (afu)
bysort persnr (begepi): g wage_any_fa=tentgelt[_n+1] if tentgelt[_n+1]!=. & tentgelt!=.
gen ln_wage_any_fa = ln(wage_any_fa)
label var ln_wage_any_fa "Ln hourly wage"


* Full-time job
sort persnr begepi 
clonevar teilzeit_lb=teilzeit // last job before unemployment
bysort persnr (begepi): g teilzeit_fa=teilzeit[_n+1] if teilzeit[_n+1]!=. & teilzeit!=.  //  first job after unemployment


foreach z in  "_fa" "_lb" {
	cap drop full_time`z'
g full_time`z'=1 if  teilzeit`z'!=.
replace full_time`z'=0 if  teilzeit`z'==1  // check JACO; WHAT DOES MISSING MEAN? why there so many missings
label var full_time`z' "Full-time employ."
	}



* Change in occupation between first job after unemp. and last job before unemp.
cap drop ieb_beruf_kons_num_lb ieb_beruf_kons_num_fa
clonevar ieb_beruf_kons_num_lb=ieb_beruf_kons_num // last job before unemployment
bysort persnr (begepi): g ieb_beruf_kons_num_fa=ieb_beruf_kons_num[_n+1] if ieb_beruf_kons_num[_n+1]!=. & ieb_beruf_kons_num!=. //  first job after unemployment
cap drop diff_beruf_kons_num
	g diff_beruf_kons_num=1 if ieb_beruf_kons_num_fa!=ieb_beruf_kons_num_lb & ieb_beruf_kons_num_fa!=. & ieb_beruf_kons_num_lb!=.
	replace diff_beruf_kons_num=0 if ieb_beruf_kons_num_fa==ieb_beruf_kons_num_lb & ieb_beruf_kons_num_fa!=. & ieb_beruf_kons_num_lb!=.
	label var diff_beruf_kons_num "Change occup."
	
* Change in industry between first job after unemp. and last job before unemp.
clonevar wz08_kons_num_lb=wz08_kons_num // last job before unemployment
bysort persnr (begepi): g wz08_kons_num_fa=wz08_kons_num[_n+1] if wz08_kons_num[_n+1]!=. & wz08_kons_num!=. //  first job after unemployment
	g diff_indust_kons_num=1 if wz08_kons_num_fa!=wz08_kons_num_lb & wz08_kons_num_fa!=. & wz08_kons_num_lb!=.
	replace diff_indust_kons_num=0 if wz08_kons_num_fa==wz08_kons_num_lb & wz08_kons_num_fa!=. & wz08_kons_num_lb!=.
label var diff_indust_kons_num "Change industry"
	
* Company size

gen az_ges_lb = az_ges
bysort persnr (begepi): g az_ges_fa=az_ges[_n+1] if az_ges[_n+1]!=. & az_ges!=.
g ln_firmsize_lb=ln(az_ges) // last job before unemployment
g ln_firmsize_fa=ln(az_ges_fa) //  first job after unemployment
 label var ln_firmsize_lb "Ln firm size"
 label var ln_firmsize_fa "Ln firm size"
 
* Firm AKM above the median
clonevar AKM_lb=AKM // last job before unemployment
bysort persnr (begepi): g AKM_fa=AKM[_n+1] if AKM[_n+1]!=. & AKM`x'!=. //  first job after unemployment

	cap drop median
foreach z in "_fa"  "_lb" {
	sum AKM`z', d
	g median=r(p50)
	g abovemedian_AKM`z'=1 if AKM`z'>median & AKM`z'!=.
	replace abovemedian_AKM`z'=0 if abovemedian_AKM`z'==. & AKM`z'!=.
	label var abovemedian_AKM`z' "AKM above med."
	cap drop median
	}
 
	
	 
* Firm qualification above the median
clonevar az_hq_lb=az_hq // last job before unemployment
bysort persnr (begepi): g az_hq_fa=az_hq[_n+1] if az_hq[_n+1]!=. & az_hq!=.  //  first job after unemployment
foreach z in  "_fa" "_lb" {
	sum az_hq`z', d
	g median=r(p50)
	g qualif_hq`z'_ab_med=1 if az_hq`z'>median & az_hq`z'!=.
	replace  qualif_hq`z'_ab_med=0 if  qualif_hq`z'_ab_med==. & az_hq`z'!=.
	label var qualif_hq`z'_ab_med "Share HS above median"
	cap drop median
	}


* Firm share of Germans above the median
foreach x in  _d {
clonevar az`x'_lb=az`x'
bysort persnr (begepi): g az`x'_fa=az`x'[_n+1] if az`x'[_n+1]!=. & az`x'!=.

}

cap drop share_for*
foreach x in  "_fa" "_lb" {
	cap drop az_for`x'
    g share_for`x'=az_d`x'/az_ges`x'  if az_ges`x'!=. & az_d`x'!=.
	label var share_for`x' "Share of Germans in the firm"
	}
	
foreach x in  share_for  {
foreach z in  "_fa" "_lb" {
	sum `x'`z', d
	g median=r(p50)
	g `x'`z'_ab_med=1 if `x'`z'>median & `x'`z'!=.
	replace  `x'`z'_ab_med=0 if  `x'`z'_ab_med==. & `x'`z'!=.
	label var share_for`z'_ab_med  "Share of Germans above median"
	cap drop median
	}
	}


	

	save "$intermediate/finaldata_main_ieb_noevent.dta", replace

	
	
***************************************
*** MERGE EVENT DATA *** 
***************************************
	

merge m:1 startmy ieb_nat_id using "$globalterror/final\prepost_setup_smonthly_ieb_m3y_final.dta", gen(merge_prepost_m3y) keep(1 3 4 5) update
merge m:1 startmy ieb_nat_id using "$globalterror/final\prepost_setup_smonthly_ieb_m4y_final.dta", gen(merge_prepost_m4y) keep(1 3 4 5) update
merge m:1 startmy ieb_nat_id using "$globalterror/final\prepost_setup_smonthly_ieb_m5y_final.dta", gen(merge_prepost_m5y) keep(1 3 4 5) update

	
****************************************************
*** LIGHTHER DATA VERSION + DATA CHECKS *** 
****************************************************

* Keep only observations being used
keep if match_date != .
drop if year==.
keep if inrange(year,2000,2018)
keep if !inlist(ieb_nat_id,.,0)


cap drop allcontrols
g allcontrols=1 if  unemp_3m!=. & edu_max!=. & age !=. & frau!=. & ysm_atunemp!=.  & state!=. & woao_kreis !=. & (treatment_terror_m3y_p90!=. | treatment_terror_m4y_p90 != . | treatment_terror_m5y_p90 !=.) 

keep if allcontrols==1
drop allcontrols


* Ensure that there are individuals in the control and treatment group within the same year and state
cap drop date2
g date2=startmy if treatment_terror_m3y_p90==1
format date2 %tm
replace  date2=startmy+3 if treatment_terror_m3y_p90==0 // this is to define unemployment period so that we ensure we are comparing individuals who entered unemployment 3 months appart even if that was not in the same year (e.g., Novembero 2012 and January 2013). We use as reference the year of unemployment in the treatment group.
g year2=year(dofm(date2))
label var year2 "Year if unemp. treated"

foreach var in m3y m4y m5y {

foreach x in treatment_terror_`var'_p90 {
	
		cap drop max_`x' min_`x'
bysort ieb_nat date2 state: egen max_`x'=max(`x')
bysort ieb_nat date2 state: egen min_`x'=min(`x')
cap drop k_`x'_st
g k_`x'_st=1 if max_`x'==1 & min_`x'==0 // there exists at least 1 treated and 1 control observation
cap drop max_`x' min_`x'
	}

	
** Group variable to cluster standard errors at the event level
clonevar relevantterror_`var'_p90=startmy if treatment_terror_`var'_p90==1
replace relevantterror_`var'_p90=startmy+3 if treatment_terror_`var'_p90==0 

egen clusterterror_`var'_p90=group(relevantterror_`var'_p90)

}

save "$final/finaldata_ieb_main_light.dta", replace